<?php

namespace Tian\Model;

use Think\Model;
use QueryList;

class WebsiteModel extends Model {

    function getCss($url_css_remote, $website_name) {//获取css
        $dir_css = $website_name . "/css/";
        $data = file_get_contents($url_css_remote);
        $css_name = basename($url_css_remote);
        if (!is_dir($dir_css)) {
            mkdir($dir_css);
        }
        file_put_contents($dir_css . $css_name, $data);
    }

    function getJs($url_js_remote, $website_name) {//获取js
        $dir_css = $website_name . "/js/";
        $content = file_get_contents($url_js_remote);
        $css_name = getAsk(basename($url_js_remote));
        if (!is_dir($dir_css)) {
            mkdir($dir_css);
        }
        $content = str_replace_common($content);
        file_put_contents($dir_css . $css_name, $content);
    }

    function getImages($file_cur, $website_name) {
        $website_name = 'website/' . $website_name;
        if (!file_exists($website_name)) {
            mkdir($website_name);
        }
        $name = substr($file_cur, strrpos($file_cur, '/') + 1); //
//        echo $name."<hr>";
        $type = getFileCat($file_cur);
        $imageFile = $website_name . "/" . $type . "/";
        if (!file_exists($imageFile)) {
            mkdir($imageFile);
        }
        $file_get = file_get_contents($file_cur);
        $pic = explode("?",$imageFile . $name);
        file_put_contents($pic[0], $file_get);
        
    }

    function getCssImagesNew($url_css) {//css中images 归类
        $dir_css = dirname($url_css) . "/";
        preg_match('/(.*\/\/.*?)\//', $dir_css, $host);
        $host = $host[1];
        $data = file_get_contents($url_css);
        $regex = '/url\(\'{0,1}\"{0,1}(.*?)\'{0,1}\"{0,1}\)/';
        preg_match_all($regex, $data, $result);
        foreach ($result[1] as $val) {
            if (preg_match('/^http.*/', $val)) {
                $target = $val;
            } else if (preg_match('/^\/.*/', $val)) {
                $target = $host . $val;
            } else {
                $target = $dir_css . $val;
            }
            preg_match('/.*\/(.*\.\D+)$/', $val, $name);
            $imagesArr[] = $name[1];
            $imagesReplace[] = $name[0];
            if ($imagesArr) {
                $imagesFlip = array_flip(array_flip($imagesArr));
                foreach ($imagesFlip as $v2) {
                    if (!is_file($dir_image . $v2)) {
                        copy($target, $dir_image . $v2);
                    }
                }
            }
        }
        $new = $data;
        $imagesReplaceArr = array_flip(array_flip($imagesReplace));

        foreach ($imagesReplaceArr as $v) {
            $img_name = basename($v);
            $new = str_replace($v, "../images/" . $img_name, $new);
        }

        file_put_contents($dir_css . basename($url_css), $new);
    }

    function getCssImages($url_css, $website_name,$site_url) {//css中images 归类
        $dir_image = $website_name . "/images/";
        $dir_css = $website_name . "/css/";
        $dir_fonts = $website_name . "/fonts/";
        $url = dirname($url_css) . "/";
        echo $url_css."<hr>";
        $data = file_get_contents($url_css);
        preg_match('/(.*\/\/.*?)\//', $url, $hosts);
        $host = $hosts[1];
        if (!is_dir($dir_image)) {
            mkdir($dir_image);
        }
        if (!is_dir($dir_css)) {
            mkdir($dir_css);
        }
      
        $regex = '/url\(\'{0,1}\"{0,1}(.*?)\'{0,1}\"{0,1}\)/';
        preg_match_all($regex, $data, $result);
    
        foreach ($result[1] as $val) {
            if (preg_match('/^http.*/', $val)) {
                $target = $val;
            } else if (preg_match('/^\/.*/', $val)) {
                 $target = $site_url . $val;
//               $target = $host . $val;
//                  if(!is_file($target)){
//                      echo $url;
//                      $target = $site_url . $val;
//                  }
//                           $files = array($host . $val,$site_url . $val,$site_url ."/". $val);
            } else {
                $target = $url . $val;
            }
            $basename = basename($val);
           echo "a"."<hr>";
            if (is_img2($val) == 1) {//把图片放到imgaes
                $img = $dir_image . $basename;
                $type = get_extension($img);
                $img_now = substr($img, 0, strrpos($img, '.')) . "." . $type;
                if (!is_file($img_now)) {
                    copy($target, $img_now);
                }
            } else {
                if (!is_dir($dir_fonts)) {
                    mkdir($dir_fonts);
                }
                $font = $dir_fonts . $basename;
             
                $type = get_extension($font);
                $font_now = substr($font, 0, strrpos($font, '.')) . "." . $type;
                if (!is_file($font_now)) {
                   copy($target, $font_now);
                }
            }
            $imagesReplace[] = $val;
        }
        $new = $data;

        $imagesReplaceArr = array_flip(array_flip($imagesReplace));
        foreach ($imagesReplaceArr as $v) {
            $img_name = basename($v);

            if (is_img($img_name) == 1) {
                $new = str_replace($v, "../images/" . $img_name, $new);
            } else {
                if (strstr($img_name, ".")) {
                    $new = str_replace($v, "../fonts/" . $img_name, $new);
                }
            }
        }
        if (is_img(basename($url_css)) == 1) {
            file_put_contents($dir_image . basename($url_css), $new);
        } else {
            file_put_contents($dir_css . getAsk(basename($url_css)), $new);
        }
    }

    function getOrderDesc($arr) {//链接长度由大到小
        $arr_new = array();
        $lists = array();
        foreach ($arr as $k => $v) {
            $lenArr[] = strlen($v);
            $arr_new[$k]['len'] = strlen($v);
            $arr_new[$k]['value'] = $v;
        }
        array_multisort($arr_new, SORT_DESC, $lenArr);
        foreach ($arr_new as $v) {
            $lists[] = $v['value'];
        }
        return $lists;
    }

    function getHtmlEmpty($new) { //替换特殊关键词
        $arr = array(
            'document.oncontextmenu=function(e){return false;}',
            '<script type="text/javascript"></script>',
            '﻿',
        );
        foreach ($arr as $v) {
            $new = str_replace($v, "", $new);
        }
        return $new;
    }

    function getHtmlExchange($new, $id, $charset) {
        $arr = array(
            0 => array(
                'old' => $charset,
                'new' => 'utf-8'
            ),
            1 => array(
                'old' => "href='/'",
                'new' => "href='index.html'"
            ),
            2 => array(
                'old' => 'href="/"',
                'new' => 'href="index.html"',
            )
        );
        $website = M("website")->field("exchange")->where("id = " . $id . "")->find();
        if ($website['exchange']) {//替换表里符字串
            $exchanges = json_decode($website['exchange'], true);
            $num = count($arr);
            foreach ($exchanges as $k => $v) {
                $arr[$k + $num]['old'] = $v['old'];
                $arr[$k + $num]['new'] = $v['new'];
            }
        }
        foreach ($arr as $v) {
            $new = str_replace($v['old'], $v['new'], $new);
        }
        $new = str_replace_common($new);
        return $new;
    }

    function htmlFormatCurl($page_html) {
        $content = file_get_contents($page_html);
        $url = "http://tool.oschina.net/action/format/html";
        $post_data = array("html" => $content);
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_TIMEOUT, 5);
        curl_setopt($ch, CURLOPT_USERAGENT, _USERAGENT_);
        curl_setopt($ch, CURLOPT_REFERER, _REFERER_);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($ch, CURLOPT_POST, 1);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $post_data);
//      curl_setopt($ch, CURLOPT_ENCODING, "gzip"); // 关键在这里
        $output = curl_exec($ch);
        curl_close($ch);
    
        $json = json_decode($output, true);
        return $json['fhtml'];
    }

    function getHtmlImages($new, $website) {
        preg_match_all("/<[img|IMG].*?src=[\'|\"](.*?(?:[\.gif|\.jpg]))[\'|\"].*?[\/]?>/i", $new, $imagesMatch1);
        $imagesArr1 = $imagesMatch1[1];
        $imagesArr1 = getImgFirst($imagesArr1, $website);
        $regex = '/url\(\'{0,1}\"{0,1}(.*?)\'{0,1}\"{0,1}\)/';
        preg_match_all($regex, $new, $imagesMatch2);
        $imagesArr2 = $imagesMatch2[1];
        if ($imagesArr1 && $imagesArr2) {
            $merge = array_merge($imagesArr1, $imagesArr2);
        } else {
            $merge = $imagesArr1 ? $imagesArr1 : $imagesArr2;
        }

        if ($merge) {
            foreach ($merge as $v) {
                $name = basename($v);
                if (is_img($name) == 1) {
                    $new = str_replace($v, 'images/' . $name, $new);
                }
            }
        }
        return $new;
    }

    function getHtmlCss($new, $website_name) {
        $websiteModal = new \Tian\Model\WebsiteModel();
        preg_match_all('/<link.+?href=(\'|")(.+?)\\1/s', $new, $cssMatch);
        $cssArr = $cssMatch[2];
        if ($cssArr) {
            $i = 0;
            foreach ($cssArr as $v) {
                $type = getAsk(get_extension($v));
                $name = basename($v);
                if ($type == 'css') {
                    $cssRemoteArr = getCurlPerFile($v, $website_name);
                    if (count($cssRemoteArr) > 1) {
                        $i++;
                        $cssRemoteArrNew = array();
                        foreach ($cssRemoteArr as $v2) {
                            $name = "css/" . basename($v2);
                            $cssRemoteArrNew[] = "<link rel='stylesheet' href= 'css/" . basename($v2) . "'>";
                        }
                        $cssNew = implode("\n", $cssRemoteArrNew);
                        $new = str_replace($v, $cssNew, $new);
                    } else {
                        $cssNew = "css/" . $name;
                    }
                    $new = str_replace($v, $cssNew, $new);
                } else {//替换favicon
//                    $icoArr = getCurlPerFile($v, $website_name);
//                    if ($icoArr) {
//                        $ico = $icoArr[0];
//                        $websiteModal->getImages($ico, $website_name);
//                        $new = str_replace($ico, "images/" . basename($ico), $new);
//                    }
                }
            }
        }
        return $new;
    }

    function getHtmlJs($new, $website_name) {
        $pattern = '/<script.+?src=(\'|")(.+?)\\1/s';
        preg_match_all($pattern, $new, $jsMatch);
      
        $jsArr = $jsMatch[2];
        if ($jsArr) {
            $i = 0;
            foreach ($jsArr as $v) {
                $type = getAsk(get_extension($v));
                if ($type == 'js') {
                    $jsRemoteArr = getCurlPerFile($v, $website_name);
                    if (count($jsRemoteArr) > 1) {
                        $i++;
                        $jsRemoteArrNew = array();
                        foreach ($jsRemoteArr as $v2) {
                            $name = "js/" . basename($v2);
                            $jsRemoteArrNew[] = "<script src= 'js/" . basename($v2) . "'></script>";
                        }
                        $jsNew = implode("\n", $jsRemoteArrNew);
                
                           $new = str_replace($jsMatch[0][1],$jsNew, $new);
                    } else {
                        $name = basename($v);
                        $jsNew = "js/" . $name;
                            $new = str_replace($v, $jsNew, $new);
                    }
                
                }
            }
            if ($i > 0) {
                $new = str_replace($v, $jsNew, $new);
            }
        }
        return $new;
    }

    function getHtmlTitle($new, $name_post, $code_post) {
        preg_match_all('/<title>(.*)<\/title>/', $new, $titleMacth);
        $title = $titleMacth[1][0];
//        $name_post = iconv("UTF-8", "GB2312", $name_post);
        if ($title) {
            $new = str_replace($title, $name_post, $new);
//             $new = str_replace($title, $name_post . "-" . $code_post . ".html", $new);
        }
        return $new;
    }

    function getHtmlHref($new, $id) {
        $websiteModal = new \Tian\Model\WebsiteModel();
        $detail = M('website')->field("json")->where("id = " . $id . "")->find();
        $jsons = json_decode($detail['json'], true);
        preg_match_all('/<a href=[\'\"]?([^\'\" ]+).*?>/', $new, $hrefMacth);
        $hrefArr = $hrefMacth[1];

        $hrefOrderDesc = $websiteModal->getOrderDesc($hrefArr);

        foreach ($hrefOrderDesc as $v) {
            foreach ($jsons as $v2) {
                if ($v2['urls'] == $v) {
                    if ($v2['codes'] == 'index') {
                        $new = str_replace($v, "", $new);
                    } else {
                        // print_r($v."||||".$v2['codes'] . ".html");
                        $new = str_replace($v, $v2['codes'] . ".html", $new);
                    }
                }
            }
        }
        return $new;
    }

    function getReplaceHref($new, $id, $url_post) {
        $detail = M('website')->field("json,website")->where("id = " . $id . "")->find();
        $jsons = json_decode($detail['json'], true);


        $pattern = array("a" => array("a", "href"));
        $qy = new QueryList($url_post, $pattern, '', '', 'utf-8');
        $rs = $qy->jsonArr;

        //所有替换链接
        foreach ($jsons as $k => $v) {
            if (strstr($v['urls'], $detail['website'])) {
                $url_replace = str_replace($detail['website'], "", $v['urls']);
                $url_sub = substr($url_replace, 0, 4);
                if ($url_sub != 'http') {
                    $jsons[$k]['urls2'] = $detail['website'] . $url_replace;
                }
                $jsons[$k]['urls'] = $url_replace;
            }
        }
//        print_r($jsons);
        foreach ($rs as $v) {
            foreach ($jsons as $v2) {
                if ($v2['urls'] == $v['a'] or $v2['urls2'] == $v['a']) {
                    $url_last = $v2['codes'] . ".html";
                    $new = str_replace("href='" . $v['a'] . "'", "href='" . $url_last . "'", $new);
                    $new = str_replace('href="' . $v['a'] . '"', 'href="' . $url_last . '"', $new);
                    $new = str_replace('href="/service/"', 'href="service.html"', $new);
                }
            }
        }
        return $new;
    }

    function getHtmlStatics($new) {
        preg_match_all('/<script[\s\S]*?<\/script>/i', $new, $jsMacth);
        $jsArr = $jsMacth[0];
        $jsReplace = array("hm.baidu.com", "cnzz.com");
        foreach ($jsArr as $v) {
            foreach ($jsReplace as $v2) {
                if (strpos($v, $v2) > 0) {
                    $new = str_replace($v, "", $new);
                }
            }
        }
        return $new;
    }

    function getLastImages($new, $website, $website_name) {//图片扫尾 并且link 链接的是否是图片
        $websiteModal = new \Tian\Model\WebsiteModel();
        preg_match_all("/<img.*?src=[\\\'| \\\"](.*?(?:[\.gif|\.jpg]))[\\\'|\\\"].*?[\/]?>/", $new, $imagesMatch1);
        $imagesArr1 = $imagesMatch1[1];
        foreach ($imagesArr1 as $v) {
            $first = substr($v, 0, 6);
            if ($first != 'images') {
                $img_last = getCurlImg($v, $website);
                $websiteModal->getImages($img_last, $website_name);
                $name = basename($v);
                $new = str_replace($v, 'images/' . $name, $new);
            }
        }
        preg_match_all('/<link.+?href=(\'|")(.+?)\\1/s', $new, $cssMatch);
        $cssArr = $cssMatch[2];
        foreach ($cssArr as $v) {
            if ($v) {
                $is_img = is_img2($v);
                if ($is_img == 1) {
                    $name = basename($v);
                    $new = str_replace($v, 'images/' . $name, $new);
                }
            }
        }
        return $new;
    }

}
?>

